六：梯度下降（Gradient Descent） - Fainle的博客

1 . 概述

梯度下降法（英语：Gradient descent）是一个一阶最优化算法，通常也称为最陡下降法，主要使用梯度下降法找到一个函数的局部极小值，必须向函数上当前点对应梯度（或者是近似梯度）的反方向的规定步长（学习率）距离点进行迭代搜索。

2 . SSE

$E = \frac{1}{2}\sum_\mu(y^{\mu}-\hat{y}^{\mu})^2$ $\hat{y} = \sigma(\sum_{i}w_{i}x_{i}^{u})$ $E = \frac{1}{2}\sum_\mu(y^{\mu}- \sigma(\sum_{i}w_{i}x_{i}^{u}) )^2$

3 . 代码实现

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def plot_points(X, y):
    admitted = X[np.argwhere(y==1)]
    rejected = X[np.argwhere(y==0)]
    plt.scatter([s[0][0] for s in rejected], [s[0][1] for s in rejected], s = 25, color = 'blue', edgecolor = 'k')
    plt.scatter([s[0][0] for s in admitted], [s[0][1] for s in admitted], s = 25, color = 'red', edgecolor = 'k')

def display(m, b, color='g--'):
    plt.xlim(-0.05,1.05)
    plt.ylim(-0.05,1.05)
    x = np.arange(-10, 10, 0.1)
    plt.plot(x, m*x+b, color)
    
    
data = pd.read_csv('data.csv', header=None)
x = np.array(data[[0,1]])
y = np.array(data[2])
plot_points(x,y)
plt.show()


def sigmoid(x):
    """
    sigmoid 计算
    s(x) = 1/ (1 + e^-x)
    """
    return 1 / (1 + np.exp(-x))

def output_formula(features, weights, bias):
    """
    拟合函数
    x : 特征
    w : 权重
    b : 偏移
    y = s(Wx) + b
    """
    return sigmoid(np.dot(features, weights) + bias)

def error_formula(y, output):
    """
    错误率计算公式
    y : 预测值
    f : output_formula
    error = -ylog(f)-(1-y)log(1-f)
    """
    return np.dot(-y, np.log(output)) - np.dot((1 - y), np.log(1-output))
#     return -y * np.log(f) - (1 - y) * np.log(1 - f)

def update_weights(x, y, weights, bias, learnrate=0.1):
    """
    更新权重
    """
    output = output_formula(x, weights, bias)
    d_error = -(y - output)
    weights -= learnrate * d_error * x
    bias -= learnrate * d_error
    return weights, bias

def train(features, targets, epochs, learnrate, graph_lines=False):
    
    errors = []
    n_records, n_features = features.shape
    last_loss = None
    weights = np.random.normal(scale=1 / n_features**.5, size=n_features)
    bias = 0
    for e in range(epochs):
        del_w = np.zeros(weights.shape)
        for x, y in zip(features, targets):
            output = output_formula(x, weights, bias)
            error = error_formula(y, output)
            weights, bias = update_weights(x, y, weights, bias, learnrate)
        
        # Printing out the log-loss error on the training set
        out = output_formula(features, weights, bias)
        loss = np.mean(error_formula(targets, out))
        errors.append(loss)
        if e % (epochs / 10) == 0:
            print("\n========== Epoch", e,"==========")
            if last_loss and last_loss < loss:
                print("Train loss: ", loss, "  WARNING - Loss Increasing")
            else:
                print("Train loss: ", loss)
            last_loss = loss
            predictions = out > 0.5
            accuracy = np.mean(predictions == targets)
            print("Accuracy: ", accuracy)
        if graph_lines and e % (epochs / 100) == 0:
            display(-weights[0]/weights[1], -bias/weights[1])
            

    # Plotting the solution boundary
    plt.title("Solution boundary")
    display(-weights[0]/weights[1], -bias/weights[1], 'black')

    # Plotting the data
    plot_points(features, targets)
    plt.show()

    # Plotting the error
    plt.title("Error Plot")
    plt.xlabel('Number of epochs')
    plt.ylabel('Error')
    plt.plot(errors)
    plt.show()